# This code is to conduct robustness check for the spell analysis
# Created by Shuyi Qiu
# Generated on May. 8th, 2024
# Last edited on May. 8th, 2024

# Set up ----
set.seed(27705)
setwd("/Users/RachelChiu/Documents/ReProj/PSID/spell")
library(tidyverse)
library(ggplot2)
library(psidread)
library(dplyr)
library(haven)
library(readxl)
library(data.table)
library(Amelia)
library(janitor)
library(mitools)
library(mice)
library(weights)
library(radiant)

# Redo the imputation with pension account ----
load("AddData/pension_imp.RData")
forimp <- forimp |> 
  left_join(pension_imp |> mutate(year = as.numeric(year)) |> select(-xsqnr), by = c("year","indfid"))

id_cols <- c("p_gender","why_nonresp","p_age",
             "p_educ", "p_employ", "xsqnr", "rel2hh", "indfid",
             "entry_year", "entry_age", "exit_year", "exit_age", "ms_year", "ms_age",
             "fu_year", "missed_year")
logs_cols <- c("tassets","tdebts","pension_acc","homevalue", "vehicles","inc", "pwt", "hs_completed", "clg_enrolled")
noms_cols <- c("hh_sex",  "hh_poorhealth", "h_foodstamps", "is_first_born","h_livewgp", "hh_parents","hh_gp","hh_marital", "hh_racehisp")
ords_cols <- c("n_children", "n_adults", "hh_age", "hh_educ")
forimp <- forimp |> mutate(pwt = ifelse(pwt == 0, NA, pwt))
final_forimp <- forimp[,c("pid", "year", id_cols, logs_cols, noms_cols, ords_cols)]
along.out <- amelia(final_forimp, m = 5, p2s = 1,
                    idvars = id_cols,
                    ts = "year", cs = "pid",
                    ords = ords_cols,
                    noms = noms_cols,
                    logs = logs_cols,
                    polytime = 1)

# RC1. Include vehicles and pensions ----
imp_long_df <- data.frame()
for (i in c(1:5)){
  cdoc_imp <- along.out$imputations[[i]] |> 
    clean_names() |> 
    mutate(.imp = i)
  
  cov_df <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_children_gt3 = ifelse(n_children >= 3, T, F),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |> 
    mutate(nwp = ifelse((tassets - tdebts + homevalue + pension_acc)*Ratio21_nw <= w_threshold, T, F),
           incp = ifelse(inc*Ratio21_inc <= inc_threshold, T, F)) |> 
    group_by(pid) |> 
    summarise(
      total_wave = sum(fu_year == T),
      
      n_children_gt3_prop = sum(n_children_gt3 == T)/total_wave,
      
      edu_hh_lths_prop = sum(hh_educ <= 12)/total_wave,
      edu_hh_hsc_prop = sum(hh_educ > 12 & hh_educ < 16)/total_wave,
      edu_hh_gtba_prop = sum(hh_educ >= 16)/total_wave,
      
      age_hh_lt35_prop = sum(hh_age < 35)/total_wave,
      age_hh_gt35_prop = sum(hh_age >= 35)/total_wave,
      
      poorhealth_hh_prop = sum(hh_poorhealth == 1)/total_wave,
      
      h_gender_f_prop = sum(hh_sex == "F")/total_wave,
      
      h_race_nhwhite_prop = sum(hh_racehisp == "White")/total_wave,
      h_race_nhblack_prop = sum(hh_racehisp == "Black")/total_wave,
      h_race_hispanic_prop = sum(hh_racehisp == "Hispanic")/total_wave,
      h_race_other_prop = sum(hh_racehisp == "Others")/total_wave,
      
      h_marriage_married_prop = sum(hh_marital == "Married")/total_wave,
      h_marriage_single_prop = sum(hh_marital == "Single")/total_wave,
      h_marriage_widowed_prop = sum(hh_marital == "Widowed")/total_wave,
      h_marriage_div_prop = sum(hh_marital == "Divorced or Separated")/total_wave,
      h_marriage_nonmarried_prop = sum(hh_marital != "Married")/total_wave,
      
      foodstamps_used_prop = sum(h_foodstamps == 1)/total_wave,
      
      with_gp_true_prop = sum(h_livewgp == 1)/total_wave,
      hh_parents_true_prop = sum(hh_parents == 1)/total_wave,
      hh_gp_true_prop = sum(hh_gp == 1)/total_wave,
      
      incp_prop = sum(incp == 1)/total_wave,
      
      pwt = mean(pwt)
    )
  
  pov_desc <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |> 
    mutate(nwp = ifelse((tassets - tdebts + homevalue + pension_acc)*Ratio21_nw <= w_threshold, T, F),
           incp = ifelse(inc*Ratio21_inc <= inc_threshold, T, F)) |> 
    group_by(pid, .imp) |> 
    summarise(
      num_nwp = sum(nwp),
      num_fu = n(),
      ever_nwp = ifelse(num_nwp > 0, T, F),
      pct_nwp = num_nwp/num_fu,
      num_incp = sum(incp),
      ever_incp = ifelse(num_incp > 0, T, F),
      pct_incp = num_incp/num_fu,
    )
  
  nwpov_desc <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(nwp = ifelse((tassets - tdebts + homevalue + pension_acc)*Ratio21_nw <= w_threshold, T, F)) |> 
    #filter(indfid != 0) |> 
    group_by(pid) |> 
    mutate(grp = rleid(nwp)) |> 
    filter(nwp == TRUE) |> 
    group_by(pid, grp) |> 
    summarise(spell_length = n()) |> 
    group_by(pid) |> 
    summarise(longest_spell_nwp = max(spell_length),
              num_spell_nwp = n())
  
  incpov_desc <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(incp = ifelse(inc*Ratio21_inc <= inc_threshold, T, F)) |> 
    #filter(indfid != 0) |> 
    group_by(pid) |> 
    mutate(grp = rleid(incp)) |> 
    filter(incp == TRUE) |> 
    group_by(pid, grp) |> 
    summarise(spell_length = n()) |> 
    group_by(pid) |> 
    summarise(longest_spell_incp = max(spell_length),
              num_spell_incp = n())
  
  nwpov_pct_byage <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(nwp = ifelse((tassets - tdebts + homevalue + pension_acc)*Ratio21_nw <= w_threshold, T, F),
           p_age = ifelse(p_age == 0 | p_age > 100, NA, p_age), 
           p_age_imp = entry_age + year - entry_year,
           p_age_comp = ifelse(is.na(p_age), p_age_imp, p_age),
           age_cat = as.factor(case_when(
             p_age_comp >= 0 & p_age_comp <= 6 ~ "0to6",
             p_age_comp > 6 & p_age_comp <= 12 ~ "6to12",
             p_age_comp > 12 ~ "12to18"
           ))) |> 
    group_by(pid, age_cat) |> 
    summarise(
      total_wave = n(),
      nwp_pct = sum(nwp)/total_wave 
    ) |> 
    select(-total_wave) |> 
    pivot_wider(
      names_from = age_cat,
      values_from = nwp_pct,
      names_prefix = "nwp_pct"
    )
  
  incpov_pct_byage <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(incp = ifelse(inc*Ratio21_inc <= inc_threshold, T, F),
           p_age = ifelse(p_age == 0 | p_age > 100, NA, p_age), 
           p_age_imp = entry_age + year - entry_year,
           p_age_comp = ifelse(is.na(p_age), p_age_imp, p_age),
           age_cat = as.factor(case_when(
             p_age_comp >= 0 & p_age_comp <= 6 ~ "0to6",
             p_age_comp > 6 & p_age_comp <= 12 ~ "6to12",
             p_age_comp > 12 ~ "12to18"
           ))) |> 
    group_by(pid, age_cat) |> 
    summarise(
      total_wave = n(),
      incp_pct = sum(incp)/total_wave 
    ) |> 
    select(-total_wave) |> 
    pivot_wider(
      names_from = age_cat,
      values_from = incp_pct,
      names_prefix = "incp_pct"
    )
  
  nwpov_share_byage <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(nwp = ifelse((tassets - tdebts + homevalue + pension_acc)*Ratio21_nw <= w_threshold, T, F),
           p_age = ifelse(p_age == 0 | p_age > 100, NA, p_age), 
           p_age_imp = entry_age + year - entry_year,
           p_age_comp = ifelse(is.na(p_age), p_age_imp, p_age),
           age_cat = as.factor(case_when(
             p_age_comp >= 0 & p_age_comp <= 6 ~ "0to6",
             p_age_comp > 6 & p_age_comp <= 12 ~ "6to12",
             p_age_comp > 12 ~ "12to18"
           ))) |> 
    group_by(pid) |> 
    mutate(num_nwp = sum(nwp)) |> 
    group_by(pid, age_cat) |> 
    summarise(
      nwp_share = sum(nwp)/mean(num_nwp),
      nwp_share = ifelse(is.nan(nwp_share), 0, nwp_share)
    ) |> 
    pivot_wider(
      names_from = age_cat,
      values_from = nwp_share,
      names_prefix = "nwp_share"
    )
  
  incpov_share_byage <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(incp = ifelse(inc*Ratio21_inc <= inc_threshold, T, F),
           p_age = ifelse(p_age == 0 | p_age > 100, NA, p_age), 
           p_age_imp = entry_age + year - entry_year,
           p_age_comp = ifelse(is.na(p_age), p_age_imp, p_age),
           age_cat = as.factor(case_when(
             p_age_comp >= 0 & p_age_comp <= 6 ~ "0to6",
             p_age_comp > 6 & p_age_comp <= 12 ~ "6to12",
             p_age_comp > 12 ~ "12to18"
           ))) |> 
    group_by(pid) |> 
    mutate(num_incp = sum(incp)) |> 
    group_by(pid, age_cat) |> 
    summarise(
      incp_share = sum(incp)/mean(num_incp),
      incp_share = ifelse(is.nan(incp_share), 0, incp_share)
    ) |> 
    pivot_wider(
      names_from = age_cat,
      values_from = incp_share,
      names_prefix = "incp_share"
    )
  
  cdoc_df <- cdoc_imp |> 
    filter(year == ms_year) |> 
    select(pid, clg_enrolled, hs_completed, pwt) |> 
    rename(pwt_ms = pwt)
  
  demo_df <- cdoc_imp |> 
    filter(year == entry_year) |> 
    select(pid, hh_racehisp, hh_age, hh_educ, hh_marital) |> 
    rename(hh_racehisp_entry = hh_racehisp,
           hh_age_entry = hh_age
    ) |> 
    mutate(hh_age_gt35_entry = ifelse(hh_age_entry > 35, T, F),
           hh_educ_entry = as.factor(case_when(
             hh_educ < 12 ~ "lths",
             hh_educ >= 12 & hh_educ < 16 ~ "hsc",
             hh_educ >= 16 ~ "gtba"
           )),
           hh_marital_entry = ifelse(hh_marital == "Married", T, F)) |> 
    select(pid, hh_racehisp_entry, hh_age_gt35_entry, hh_educ_entry, hh_marital_entry)
  
  imp_df <- sample_id |> 
    left_join(nwpov_desc, by = "pid") |> 
    left_join(incpov_desc, by = "pid") |> 
    left_join(cdoc_df, by = "pid") |> 
    left_join(pov_desc, by = "pid") |> 
    left_join(cov_df, by = "pid") |> 
    left_join(cdoc_imp |> filter(year == entry_year) |> select(pid, is_first_born, p_gender), by = "pid") |> 
    left_join(nwpov_pct_byage, by = "pid") |> 
    left_join(nwpov_share_byage, by = "pid") |> 
    left_join(incpov_pct_byage, by = "pid") |> 
    left_join(incpov_share_byage, by = "pid") |> 
    left_join(demo_df, by = "pid") |> 
    mutate(longest_spell_nwp = ifelse(is.na(longest_spell_nwp), 0, longest_spell_nwp),
           longest_spell_incp = ifelse(is.na(longest_spell_incp), 0, longest_spell_incp),
           num_spell_nwp = ifelse(is.na(num_spell_nwp), 0, num_spell_nwp),
           num_spell_incp = ifelse(is.na(num_spell_incp), 0, num_spell_incp))
  
  imp_long_df <- imp_long_df %>%
    bind_rows(imp_df)
}

getImpList <- function(MIdata, impIDColumn){
  d <- list()
  imps <- max(MIdata[,impIDColumn])
  for(i in 1:imps){
    d[[i]] <- MIdata[MIdata[,impIDColumn]==i,]
  }
  return(d)
}

library(mitools)
temp <- imp_long_df |> 
  mutate(consec_nwp = factor(case_when(
    num_spell_nwp == 0 & num_nwp == 0 ~ "Never",
    num_spell_nwp == 1 & num_nwp == 1 ~ "One wave",
    num_spell_nwp == 1 & num_nwp > 1 & pct_nwp <= 0.5 ~ "Short consec",
    num_spell_nwp == 1 & num_nwp > 1 & pct_nwp > 0.5 ~ "Long consec", 
    num_spell_nwp > 1 & pct_nwp <= 0.5 ~ "Short fluc",
    num_spell_nwp > 1 & pct_nwp > 0.5 ~ "Long fluc"
  ), levels = c("Never","One wave","Short fluc","Short consec","Long fluc","Long consec")),
  consec_incp = factor(case_when(
    num_spell_incp == 0 & num_incp == 0 ~ "Never",
    num_spell_incp == 1 & num_incp == 1 ~ "One wave",
    num_spell_incp == 1 & num_incp > 1 & pct_incp <= 0.5 ~ "Short consec",
    num_spell_incp == 1 & num_incp > 1 & pct_incp > 0.5 ~ "Long consec", 
    num_spell_incp > 1 & pct_incp <= 0.5 ~ "Short fluc",
    num_spell_incp > 1 & pct_incp > 0.5 ~ "Long fluc"
  ), levels = c("Never","One wave","Short fluc","Short consec","Long fluc","Long consec")))

temp$consec_nwp <- relevel(temp$consec_nwp, ref = "Never")
temp$consec_incp <- relevel(temp$consec_incp, ref = "Never")
temp$hh_racehisp_entry <- relevel(temp$hh_racehisp_entry, ref = "White")
temp$hh_educ_entry <- relevel(temp$hh_educ_entry, ref = "lths")
# temp <- temp |> filter(entry_age == 1)
new_imp <- imputationList(getImpList(temp, c(".imp")))

# RC3a. Use 50% As Threshold ----
imp_long_df <- data.frame()
for (i in c(1:5)){
  cdoc_imp <- along.out$imputations[[i]] |> 
    clean_names() |> 
    mutate(.imp = i)
  
  cov_df <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_children_gt3 = ifelse(n_children >= 3, T, F),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |> 
    mutate(nwp = ifelse((tassets - tdebts + homevalue - vehicles)*Ratio21_nw <= (w_threshold*0.5), T, F),
           incp = ifelse(inc*Ratio21_inc <= inc_threshold, T, F)) |> 
    group_by(pid) |> 
    summarise(
      total_wave = sum(fu_year == T),
      
      n_children_gt3_prop = sum(n_children_gt3 == T)/total_wave,
      
      edu_hh_lths_prop = sum(hh_educ <= 12)/total_wave,
      edu_hh_hsc_prop = sum(hh_educ > 12 & hh_educ < 16)/total_wave,
      edu_hh_gtba_prop = sum(hh_educ >= 16)/total_wave,
      
      age_hh_lt35_prop = sum(hh_age < 35)/total_wave,
      age_hh_gt35_prop = sum(hh_age >= 35)/total_wave,
      
      poorhealth_hh_prop = sum(hh_poorhealth == 1)/total_wave,
      
      h_gender_f_prop = sum(hh_sex == "F")/total_wave,
      
      h_race_nhwhite_prop = sum(hh_racehisp == "White")/total_wave,
      h_race_nhblack_prop = sum(hh_racehisp == "Black")/total_wave,
      h_race_hispanic_prop = sum(hh_racehisp == "Hispanic")/total_wave,
      h_race_other_prop = sum(hh_racehisp == "Others")/total_wave,
      
      h_marriage_married_prop = sum(hh_marital == "Married")/total_wave,
      h_marriage_single_prop = sum(hh_marital == "Single")/total_wave,
      h_marriage_widowed_prop = sum(hh_marital == "Widowed")/total_wave,
      h_marriage_div_prop = sum(hh_marital == "Divorced or Separated")/total_wave,
      h_marriage_nonmarried_prop = sum(hh_marital != "Married")/total_wave,
      
      foodstamps_used_prop = sum(h_foodstamps == 1)/total_wave,
      
      with_gp_true_prop = sum(h_livewgp == 1)/total_wave,
      hh_parents_true_prop = sum(hh_parents == 1)/total_wave,
      hh_gp_true_prop = sum(hh_gp == 1)/total_wave,
      
      incp_prop = sum(incp == 1)/total_wave,
      
      pwt = mean(pwt)
    )
  
  pov_desc <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |> 
    mutate(nwp = ifelse((tassets - tdebts + homevalue - vehicles)*Ratio21_nw <= (w_threshold*0.5), T, F),
           incp = ifelse(inc*Ratio21_inc <= inc_threshold, T, F)) |> 
    group_by(pid, .imp) |> 
    summarise(
      num_nwp = sum(nwp),
      num_fu = n(),
      ever_nwp = ifelse(num_nwp > 0, T, F),
      pct_nwp = num_nwp/num_fu,
      num_incp = sum(incp),
      ever_incp = ifelse(num_incp > 0, T, F),
      pct_incp = num_incp/num_fu,
    )
  
  nwpov_desc <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(nwp = ifelse((tassets - tdebts + homevalue - vehicles)*Ratio21_nw <= (w_threshold*0.5), T, F)) |> 
    #filter(indfid != 0) |> 
    group_by(pid) |> 
    mutate(grp = rleid(nwp)) |> 
    filter(nwp == TRUE) |> 
    group_by(pid, grp) |> 
    summarise(spell_length = n()) |> 
    group_by(pid) |> 
    summarise(longest_spell_nwp = max(spell_length),
              num_spell_nwp = n())
  
  incpov_desc <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(incp = ifelse(inc*Ratio21_inc <= inc_threshold, T, F)) |> 
    #filter(indfid != 0) |> 
    group_by(pid) |> 
    mutate(grp = rleid(incp)) |> 
    filter(incp == TRUE) |> 
    group_by(pid, grp) |> 
    summarise(spell_length = n()) |> 
    group_by(pid) |> 
    summarise(longest_spell_incp = max(spell_length),
              num_spell_incp = n())
  
  nwpov_pct_byage <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(nwp = ifelse((tassets - tdebts + homevalue - vehicles)*Ratio21_nw <= (w_threshold*0.5), T, F),
           p_age = ifelse(p_age == 0 | p_age > 100, NA, p_age), 
           p_age_imp = entry_age + year - entry_year,
           p_age_comp = ifelse(is.na(p_age), p_age_imp, p_age),
           age_cat = as.factor(case_when(
             p_age_comp >= 0 & p_age_comp <= 6 ~ "0to6",
             p_age_comp > 6 & p_age_comp <= 12 ~ "6to12",
             p_age_comp > 12 ~ "12to18"
           ))) |> 
    group_by(pid, age_cat) |> 
    summarise(
      total_wave = n(),
      nwp_pct = sum(nwp)/total_wave 
    ) |> 
    select(-total_wave) |> 
    pivot_wider(
      names_from = age_cat,
      values_from = nwp_pct,
      names_prefix = "nwp_pct"
    )
  
  incpov_pct_byage <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(incp = ifelse(inc*Ratio21_inc <= inc_threshold, T, F),
           p_age = ifelse(p_age == 0 | p_age > 100, NA, p_age), 
           p_age_imp = entry_age + year - entry_year,
           p_age_comp = ifelse(is.na(p_age), p_age_imp, p_age),
           age_cat = as.factor(case_when(
             p_age_comp >= 0 & p_age_comp <= 6 ~ "0to6",
             p_age_comp > 6 & p_age_comp <= 12 ~ "6to12",
             p_age_comp > 12 ~ "12to18"
           ))) |> 
    group_by(pid, age_cat) |> 
    summarise(
      total_wave = n(),
      incp_pct = sum(incp)/total_wave 
    ) |> 
    select(-total_wave) |> 
    pivot_wider(
      names_from = age_cat,
      values_from = incp_pct,
      names_prefix = "incp_pct"
    )
  
  nwpov_share_byage <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(nwp = ifelse((tassets - tdebts + homevalue - vehicles)*Ratio21_nw <= (w_threshold*0.5), T, F),
           p_age = ifelse(p_age == 0 | p_age > 100, NA, p_age), 
           p_age_imp = entry_age + year - entry_year,
           p_age_comp = ifelse(is.na(p_age), p_age_imp, p_age),
           age_cat = as.factor(case_when(
             p_age_comp >= 0 & p_age_comp <= 6 ~ "0to6",
             p_age_comp > 6 & p_age_comp <= 12 ~ "6to12",
             p_age_comp > 12 ~ "12to18"
           ))) |> 
    group_by(pid) |> 
    mutate(num_nwp = sum(nwp)) |> 
    group_by(pid, age_cat) |> 
    summarise(
      nwp_share = sum(nwp)/mean(num_nwp),
      nwp_share = ifelse(is.nan(nwp_share), 0, nwp_share)
    ) |> 
    pivot_wider(
      names_from = age_cat,
      values_from = nwp_share,
      names_prefix = "nwp_share"
    )
  
  incpov_share_byage <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(incp = ifelse(inc*Ratio21_inc <= inc_threshold, T, F),
           p_age = ifelse(p_age == 0 | p_age > 100, NA, p_age), 
           p_age_imp = entry_age + year - entry_year,
           p_age_comp = ifelse(is.na(p_age), p_age_imp, p_age),
           age_cat = as.factor(case_when(
             p_age_comp >= 0 & p_age_comp <= 6 ~ "0to6",
             p_age_comp > 6 & p_age_comp <= 12 ~ "6to12",
             p_age_comp > 12 ~ "12to18"
           ))) |> 
    group_by(pid) |> 
    mutate(num_incp = sum(incp)) |> 
    group_by(pid, age_cat) |> 
    summarise(
      incp_share = sum(incp)/mean(num_incp),
      incp_share = ifelse(is.nan(incp_share), 0, incp_share)
    ) |> 
    pivot_wider(
      names_from = age_cat,
      values_from = incp_share,
      names_prefix = "incp_share"
    )
  
  cdoc_df <- cdoc_imp |> 
    filter(year == ms_year) |> 
    select(pid, clg_enrolled, hs_completed, pwt) |> 
    rename(pwt_ms = pwt)
  
  demo_df <- cdoc_imp |> 
    filter(year == entry_year) |> 
    select(pid, hh_racehisp, hh_age, hh_educ, hh_marital) |> 
    rename(hh_racehisp_entry = hh_racehisp,
           hh_age_entry = hh_age
    ) |> 
    mutate(hh_age_gt35_entry = ifelse(hh_age_entry > 35, T, F),
           hh_educ_entry = as.factor(case_when(
             hh_educ < 12 ~ "lths",
             hh_educ >= 12 & hh_educ < 16 ~ "hsc",
             hh_educ >= 16 ~ "gtba"
           )),
           hh_marital_entry = ifelse(hh_marital == "Married", T, F)) |> 
    select(pid, hh_racehisp_entry, hh_age_gt35_entry, hh_educ_entry, hh_marital_entry)
  
  imp_df <- sample_id |> 
    left_join(nwpov_desc, by = "pid") |> 
    left_join(incpov_desc, by = "pid") |> 
    left_join(cdoc_df, by = "pid") |> 
    left_join(pov_desc, by = "pid") |> 
    left_join(cov_df, by = "pid") |> 
    left_join(cdoc_imp |> filter(year == entry_year) |> select(pid, is_first_born, p_gender), by = "pid") |> 
    left_join(nwpov_pct_byage, by = "pid") |> 
    left_join(nwpov_share_byage, by = "pid") |> 
    left_join(incpov_pct_byage, by = "pid") |> 
    left_join(incpov_share_byage, by = "pid") |> 
    left_join(demo_df, by = "pid") |> 
    mutate(longest_spell_nwp = ifelse(is.na(longest_spell_nwp), 0, longest_spell_nwp),
           longest_spell_incp = ifelse(is.na(longest_spell_incp), 0, longest_spell_incp),
           num_spell_nwp = ifelse(is.na(num_spell_nwp), 0, num_spell_nwp),
           num_spell_incp = ifelse(is.na(num_spell_incp), 0, num_spell_incp))
  
  imp_long_df <- imp_long_df %>%
    bind_rows(imp_df)
}

getImpList <- function(MIdata, impIDColumn){
  d <- list()
  imps <- max(MIdata[,impIDColumn])
  for(i in 1:imps){
    d[[i]] <- MIdata[MIdata[,impIDColumn]==i,]
  }
  return(d)
}

library(mitools)
temp <- imp_long_df |> 
  mutate(consec_nwp = factor(case_when(
    num_spell_nwp == 0 & num_nwp == 0 ~ "Never",
    num_spell_nwp == 1 & num_nwp == 1 ~ "One wave",
    num_spell_nwp == 1 & num_nwp > 1 & pct_nwp <= 0.5 ~ "Short consec",
    num_spell_nwp == 1 & num_nwp > 1 & pct_nwp > 0.5 ~ "Long consec", 
    num_spell_nwp > 1 & pct_nwp <= 0.5 ~ "Short fluc",
    num_spell_nwp > 1 & pct_nwp > 0.5 ~ "Long fluc"
  ), levels = c("Never","One wave","Short fluc","Short consec","Long fluc","Long consec")),
  consec_incp = factor(case_when(
    num_spell_incp == 0 & num_incp == 0 ~ "Never",
    num_spell_incp == 1 & num_incp == 1 ~ "One wave",
    num_spell_incp == 1 & num_incp > 1 & pct_incp <= 0.5 ~ "Short consec",
    num_spell_incp == 1 & num_incp > 1 & pct_incp > 0.5 ~ "Long consec", 
    num_spell_incp > 1 & pct_incp <= 0.5 ~ "Short fluc",
    num_spell_incp > 1 & pct_incp > 0.5 ~ "Long fluc"
  ), levels = c("Never","One wave","Short fluc","Short consec","Long fluc","Long consec")))

temp$consec_nwp <- relevel(temp$consec_nwp, ref = "Never")
temp$consec_incp <- relevel(temp$consec_incp, ref = "Never")
temp$hh_racehisp_entry <- relevel(temp$hh_racehisp_entry, ref = "White")
temp$hh_educ_entry <- relevel(temp$hh_educ_entry, ref = "lths")
# temp <- temp |> filter(entry_age == 1)
new_imp <- imputationList(getImpList(temp, c(".imp")))

# RC3b. Use 200% As Threshold ----
imp_long_df <- data.frame()
for (i in c(1:5)){
  cdoc_imp <- along.out$imputations[[i]] |> 
    clean_names() |> 
    mutate(.imp = i)
  
  cov_df <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_children_gt3 = ifelse(n_children >= 3, T, F),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |> 
    mutate(nwp = ifelse((tassets - tdebts + homevalue - vehicles)*Ratio21_nw <= (w_threshold*2), T, F),
           incp = ifelse(inc*Ratio21_inc <= inc_threshold, T, F)) |> 
    group_by(pid) |> 
    summarise(
      total_wave = sum(fu_year == T),
      
      n_children_gt3_prop = sum(n_children_gt3 == T)/total_wave,
      
      edu_hh_lths_prop = sum(hh_educ <= 12)/total_wave,
      edu_hh_hsc_prop = sum(hh_educ > 12 & hh_educ < 16)/total_wave,
      edu_hh_gtba_prop = sum(hh_educ >= 16)/total_wave,
      
      age_hh_lt35_prop = sum(hh_age < 35)/total_wave,
      age_hh_gt35_prop = sum(hh_age >= 35)/total_wave,
      
      poorhealth_hh_prop = sum(hh_poorhealth == 1)/total_wave,
      
      h_gender_f_prop = sum(hh_sex == "F")/total_wave,
      
      h_race_nhwhite_prop = sum(hh_racehisp == "White")/total_wave,
      h_race_nhblack_prop = sum(hh_racehisp == "Black")/total_wave,
      h_race_hispanic_prop = sum(hh_racehisp == "Hispanic")/total_wave,
      h_race_other_prop = sum(hh_racehisp == "Others")/total_wave,
      
      h_marriage_married_prop = sum(hh_marital == "Married")/total_wave,
      h_marriage_single_prop = sum(hh_marital == "Single")/total_wave,
      h_marriage_widowed_prop = sum(hh_marital == "Widowed")/total_wave,
      h_marriage_div_prop = sum(hh_marital == "Divorced or Separated")/total_wave,
      h_marriage_nonmarried_prop = sum(hh_marital != "Married")/total_wave,
      
      foodstamps_used_prop = sum(h_foodstamps == 1)/total_wave,
      
      with_gp_true_prop = sum(h_livewgp == 1)/total_wave,
      hh_parents_true_prop = sum(hh_parents == 1)/total_wave,
      hh_gp_true_prop = sum(hh_gp == 1)/total_wave,
      
      incp_prop = sum(incp == 1)/total_wave,
      
      pwt = mean(pwt)
    )
  
  pov_desc <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |> 
    mutate(nwp = ifelse((tassets - tdebts + homevalue - vehicles)*Ratio21_nw <= (w_threshold*2), T, F),
           incp = ifelse(inc*Ratio21_inc <= inc_threshold, T, F)) |> 
    group_by(pid, .imp) |> 
    summarise(
      num_nwp = sum(nwp),
      num_fu = n(),
      ever_nwp = ifelse(num_nwp > 0, T, F),
      pct_nwp = num_nwp/num_fu,
      num_incp = sum(incp),
      ever_incp = ifelse(num_incp > 0, T, F),
      pct_incp = num_incp/num_fu,
    )
  
  nwpov_desc <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(nwp = ifelse((tassets - tdebts + homevalue - vehicles)*Ratio21_nw <= (w_threshold*2), T, F)) |> 
    #filter(indfid != 0) |> 
    group_by(pid) |> 
    mutate(grp = rleid(nwp)) |> 
    filter(nwp == TRUE) |> 
    group_by(pid, grp) |> 
    summarise(spell_length = n()) |> 
    group_by(pid) |> 
    summarise(longest_spell_nwp = max(spell_length),
              num_spell_nwp = n())
  
  incpov_desc <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(incp = ifelse(inc*Ratio21_inc <= inc_threshold, T, F)) |> 
    #filter(indfid != 0) |> 
    group_by(pid) |> 
    mutate(grp = rleid(incp)) |> 
    filter(incp == TRUE) |> 
    group_by(pid, grp) |> 
    summarise(spell_length = n()) |> 
    group_by(pid) |> 
    summarise(longest_spell_incp = max(spell_length),
              num_spell_incp = n())
  
  nwpov_pct_byage <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(nwp = ifelse((tassets - tdebts + homevalue - vehicles)*Ratio21_nw <= (w_threshold*2), T, F),
           p_age = ifelse(p_age == 0 | p_age > 100, NA, p_age), 
           p_age_imp = entry_age + year - entry_year,
           p_age_comp = ifelse(is.na(p_age), p_age_imp, p_age),
           age_cat = as.factor(case_when(
             p_age_comp >= 0 & p_age_comp <= 6 ~ "0to6",
             p_age_comp > 6 & p_age_comp <= 12 ~ "6to12",
             p_age_comp > 12 ~ "12to18"
           ))) |> 
    group_by(pid, age_cat) |> 
    summarise(
      total_wave = n(),
      nwp_pct = sum(nwp)/total_wave 
    ) |> 
    select(-total_wave) |> 
    pivot_wider(
      names_from = age_cat,
      values_from = nwp_pct,
      names_prefix = "nwp_pct"
    )
  
  incpov_pct_byage <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(incp = ifelse(inc*Ratio21_inc <= inc_threshold, T, F),
           p_age = ifelse(p_age == 0 | p_age > 100, NA, p_age), 
           p_age_imp = entry_age + year - entry_year,
           p_age_comp = ifelse(is.na(p_age), p_age_imp, p_age),
           age_cat = as.factor(case_when(
             p_age_comp >= 0 & p_age_comp <= 6 ~ "0to6",
             p_age_comp > 6 & p_age_comp <= 12 ~ "6to12",
             p_age_comp > 12 ~ "12to18"
           ))) |> 
    group_by(pid, age_cat) |> 
    summarise(
      total_wave = n(),
      incp_pct = sum(incp)/total_wave 
    ) |> 
    select(-total_wave) |> 
    pivot_wider(
      names_from = age_cat,
      values_from = incp_pct,
      names_prefix = "incp_pct"
    )
  
  nwpov_share_byage <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(nwp = ifelse((tassets - tdebts + homevalue - vehicles)*Ratio21_nw <= (w_threshold*2), T, F),
           p_age = ifelse(p_age == 0 | p_age > 100, NA, p_age), 
           p_age_imp = entry_age + year - entry_year,
           p_age_comp = ifelse(is.na(p_age), p_age_imp, p_age),
           age_cat = as.factor(case_when(
             p_age_comp >= 0 & p_age_comp <= 6 ~ "0to6",
             p_age_comp > 6 & p_age_comp <= 12 ~ "6to12",
             p_age_comp > 12 ~ "12to18"
           ))) |> 
    group_by(pid) |> 
    mutate(num_nwp = sum(nwp)) |> 
    group_by(pid, age_cat) |> 
    summarise(
      nwp_share = sum(nwp)/mean(num_nwp),
      nwp_share = ifelse(is.nan(nwp_share), 0, nwp_share)
    ) |> 
    pivot_wider(
      names_from = age_cat,
      values_from = nwp_share,
      names_prefix = "nwp_share"
    )
  
  incpov_share_byage <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(incp = ifelse(inc*Ratio21_inc <= inc_threshold, T, F),
           p_age = ifelse(p_age == 0 | p_age > 100, NA, p_age), 
           p_age_imp = entry_age + year - entry_year,
           p_age_comp = ifelse(is.na(p_age), p_age_imp, p_age),
           age_cat = as.factor(case_when(
             p_age_comp >= 0 & p_age_comp <= 6 ~ "0to6",
             p_age_comp > 6 & p_age_comp <= 12 ~ "6to12",
             p_age_comp > 12 ~ "12to18"
           ))) |> 
    group_by(pid) |> 
    mutate(num_incp = sum(incp)) |> 
    group_by(pid, age_cat) |> 
    summarise(
      incp_share = sum(incp)/mean(num_incp),
      incp_share = ifelse(is.nan(incp_share), 0, incp_share)
    ) |> 
    pivot_wider(
      names_from = age_cat,
      values_from = incp_share,
      names_prefix = "incp_share"
    )
  
  cdoc_df <- cdoc_imp |> 
    filter(year == ms_year) |> 
    select(pid, clg_enrolled, hs_completed, pwt) |> 
    rename(pwt_ms = pwt)
  
  demo_df <- cdoc_imp |> 
    filter(year == entry_year) |> 
    select(pid, hh_racehisp, hh_age, hh_educ, hh_marital) |> 
    rename(hh_racehisp_entry = hh_racehisp,
           hh_age_entry = hh_age
    ) |> 
    mutate(hh_age_gt35_entry = ifelse(hh_age_entry > 35, T, F),
           hh_educ_entry = as.factor(case_when(
             hh_educ < 12 ~ "lths",
             hh_educ >= 12 & hh_educ < 16 ~ "hsc",
             hh_educ >= 16 ~ "gtba"
           )),
           hh_marital_entry = ifelse(hh_marital == "Married", T, F)) |> 
    select(pid, hh_racehisp_entry, hh_age_gt35_entry, hh_educ_entry, hh_marital_entry)
  
  imp_df <- sample_id |> 
    left_join(nwpov_desc, by = "pid") |> 
    left_join(incpov_desc, by = "pid") |> 
    left_join(cdoc_df, by = "pid") |> 
    left_join(pov_desc, by = "pid") |> 
    left_join(cov_df, by = "pid") |> 
    left_join(cdoc_imp |> filter(year == entry_year) |> select(pid, is_first_born, p_gender), by = "pid") |> 
    left_join(nwpov_pct_byage, by = "pid") |> 
    left_join(nwpov_share_byage, by = "pid") |> 
    left_join(incpov_pct_byage, by = "pid") |> 
    left_join(incpov_share_byage, by = "pid") |> 
    left_join(demo_df, by = "pid") |> 
    mutate(longest_spell_nwp = ifelse(is.na(longest_spell_nwp), 0, longest_spell_nwp),
           longest_spell_incp = ifelse(is.na(longest_spell_incp), 0, longest_spell_incp),
           num_spell_nwp = ifelse(is.na(num_spell_nwp), 0, num_spell_nwp),
           num_spell_incp = ifelse(is.na(num_spell_incp), 0, num_spell_incp))
  
  imp_long_df <- imp_long_df %>%
    bind_rows(imp_df)
}

getImpList <- function(MIdata, impIDColumn){
  d <- list()
  imps <- max(MIdata[,impIDColumn])
  for(i in 1:imps){
    d[[i]] <- MIdata[MIdata[,impIDColumn]==i,]
  }
  return(d)
}

library(mitools)
temp <- imp_long_df |> 
  mutate(consec_nwp = factor(case_when(
    num_spell_nwp == 0 & num_nwp == 0 ~ "Never",
    num_spell_nwp == 1 & num_nwp == 1 ~ "One wave",
    num_spell_nwp == 1 & num_nwp > 1 & pct_nwp <= 0.5 ~ "Short consec",
    num_spell_nwp == 1 & num_nwp > 1 & pct_nwp > 0.5 ~ "Long consec", 
    num_spell_nwp > 1 & pct_nwp <= 0.5 ~ "Short fluc",
    num_spell_nwp > 1 & pct_nwp > 0.5 ~ "Long fluc"
  ), levels = c("Never","One wave","Short fluc","Short consec","Long fluc","Long consec")),
  consec_incp = factor(case_when(
    num_spell_incp == 0 & num_incp == 0 ~ "Never",
    num_spell_incp == 1 & num_incp == 1 ~ "One wave",
    num_spell_incp == 1 & num_incp > 1 & pct_incp <= 0.5 ~ "Short consec",
    num_spell_incp == 1 & num_incp > 1 & pct_incp > 0.5 ~ "Long consec", 
    num_spell_incp > 1 & pct_incp <= 0.5 ~ "Short fluc",
    num_spell_incp > 1 & pct_incp > 0.5 ~ "Long fluc"
  ), levels = c("Never","One wave","Short fluc","Short consec","Long fluc","Long consec")))

temp$consec_nwp <- relevel(temp$consec_nwp, ref = "Never")
temp$consec_incp <- relevel(temp$consec_incp, ref = "Never")
temp$hh_racehisp_entry <- relevel(temp$hh_racehisp_entry, ref = "White")
temp$hh_educ_entry <- relevel(temp$hh_educ_entry, ref = "lths")
# temp <- temp |> filter(entry_age == 1)
new_imp <- imputationList(getImpList(temp, c(".imp")))

# Table 2. Distribution ----
pool_mean <- with(imp_long_df, by(imp_long_df, .imp, 
                                  function(x) c(weighted.mean(x$ever_nwp, w = x$pwt) * 100, 
                                                wpct(x$num_nwp, w = x$pwt) * 100,
                                                weighted.mean(x$pct_nwp, w = x$pwt),
                                                wpct(x$longest_spell_nwp, w = x$pwt) * 100,
                                                wpct(x$num_spell_nwp, w = x$pwt) * 100,
                                                weighted.mean(x$ever_incp, w = x$pwt)* 100,
                                                wpct(x$num_incp, w = x$pwt)* 100,
                                                weighted.mean(x$pct_incp, w = x$pwt),
                                                wpct(x$longest_spell_incp, w = x$pwt)* 100,
                                                wpct(x$num_spell_incp, w = x$pwt)* 100)))
# pool_mean[[4]] <- append(pool_mean[[4]], 0, after=29)
# pool_mean[[5]] <- append(pool_mean[[5]], 0, after=29)
nwp_summary_stats <- Reduce("+",pool_mean)/length(pool_mean)
write.csv(t(nwp_summary_stats), file = "temp.csv")


# Table 3. Educ ~ NWP ----
datlist <- miceadds::mids2datlist(new_imp)
reg_summary <- data.frame(term = NA, name = NA)
for (i in c("num_nwp","pct_nwp","num_spell_nwp", "longest_spell_nwp","consec_nwp")){
  for (j in c("hs_completed","clg_enrolled")){
    fit <- with(datlist, glm(formula = get(j) ~ get(i) + incp_prop + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial, weights = pwt))
    betas <- lapply(fit, coef)
    vars <- lapply(fit, FUN = function(x){vcovCL(x, cluster = datlist[[1]]$indfid)}) #vcovCL is in the sandwich package
    reg_temp <- summary(pool_mi(betas, vars)) %>%
      mutate(
        estimate_star = case_when(
          p < 0.1 & p >= 0.05 ~ paste(as.character(round(results,3)), "*", sep = ""),
          p < 0.05 & p >= 0.01 ~ paste(as.character(round(results,3)), "**", sep = ""),
          p < 0.01 & p >= 0 ~ paste(as.character(round(results,3)), "***", sep = ""),
          TRUE ~ as.character(round(results,3))
        ),
        stdev = paste0("(",round(se, 3),")")
      ) %>%
      select(estimate_star, stdev)
    reg_temp$term <- rownames(reg_temp)
    reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
    reg_temp <- as.data.frame(reg_temp)
    reg_summary <- reg_summary %>%
      full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(i,j)))
  }
}

write.csv(reg_summary, file = "temp.csv")

datlist <- miceadds::mids2datlist(new_imp)
reg_summary <- data.frame(term = NA, name = NA)
for (i in c("num_nwp","pct_nwp","num_spell_nwp", "longest_spell_nwp","consec_nwp")){
  for (j in c("hs_completed","clg_enrolled")){
    fit <- with(datlist, glm(formula = get(j) ~ incp_prop + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial, weights = pwt))
    betas <- lapply(fit, coef)
    vars <- lapply(fit, FUN = function(x){vcovCL(x, cluster = datlist[[1]]$indfid)}) #vcovCL is in the sandwich package
    reg_temp <- summary(pool_mi(betas, vars)) %>%
      mutate(
        estimate_star = case_when(
          p < 0.1 & p >= 0.05 ~ paste(as.character(round(results,3)), "*", sep = ""),
          p < 0.05 & p >= 0.01 ~ paste(as.character(round(results,3)), "**", sep = ""),
          p < 0.01 & p >= 0 ~ paste(as.character(round(results,3)), "***", sep = ""),
          TRUE ~ as.character(round(results,3))
        ),
        stdev = paste0("(",round(se, 3),")")
      ) %>%
      select(estimate_star, stdev)
    reg_temp$term <- rownames(reg_temp)
    reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
    reg_temp <- as.data.frame(reg_temp)
    reg_summary <- reg_summary %>%
      full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(i,j)))
  }
}

write.csv(reg_summary, file = "temp.csv")

reg_summary <- data.frame(term = NA)
for (j in c("hs_completed","clg_enrolled")){
  fit <- with(data = new_imp, exp = glm(get(j) ~ is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial, weights = pwt))
  reg_temp <- summary(pool(fit)) %>%
    mutate(
      estimate_star = case_when(
        p.value < 0.1 & p.value >= 0.05 ~ paste(as.character(round(estimate,3)), "*", sep = ""),
        p.value < 0.05 & p.value >= 0.01 ~ paste(as.character(round(estimate,3)), "**", sep = ""),
        p.value < 0.01 & p.value >= 0 ~ paste(as.character(round(estimate,3)), "***", sep = ""),
        TRUE ~ as.character(round(estimate,3))
      ),
      stdev = paste0("(",round(std.error, 3),")")
    ) %>%
    select(term, estimate_star, stdev)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = "term", suffix = c("",j))
}
write.csv(reg_summary, file = "temp.csv")

# Table 5. By age ----
reg_summary <- data.frame(term = NA, name = NA)

for (j in c("hs_completed","clg_enrolled")){
  fit <- with(datlist, glm(formula = get(j) ~ nwp_pct0to6 + nwp_pct6to12 + nwp_pct12to18 + incp_prop + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial, weights = pwt))
  betas <- lapply(fit, coef)
  vars <- lapply(fit, FUN = function(x){vcovCL(x, cluster = datlist[[1]]$indfid)}) #vcovCL is in the sandwich package
  reg_temp <- summary(pool_mi(betas, vars)) %>%
    mutate(
      estimate_star = case_when(
        p < 0.1 & p >= 0.05 ~ paste(as.character(round(results,3)), "*", sep = ""),
        p < 0.05 & p >= 0.01 ~ paste(as.character(round(results,3)), "**", sep = ""),
        p < 0.01 & p >= 0 ~ paste(as.character(round(results,3)), "***", sep = ""),
        TRUE ~ as.character(round(results,3))
      ),
      stdev = paste0("(",round(se, 3),")")
    ) %>%
    select(estimate_star, stdev)
  reg_temp$term <- rownames(reg_temp)
  reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
  reg_temp <- as.data.frame(reg_temp)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(j)))
}


write.csv(reg_summary, file = "temp.csv")


reg_summary <- data.frame(term = NA, name = NA)
for (j in c("hs_completed","clg_enrolled")){
  fit <- with(datlist, glm(formula = get(j) ~ nwp_share0to6 + nwp_share6to12 + nwp_share12to18 + incp_prop + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial, weights = pwt))
  betas <- lapply(fit, coef)
  vars <- lapply(fit, FUN = function(x){vcovCL(x, cluster = datlist[[1]]$indfid)}) #vcovCL is in the sandwich package
  reg_temp <- summary(pool_mi(betas, vars)) %>%
    mutate(
      estimate_star = case_when(
        p < 0.1 & p >= 0.05 ~ paste(as.character(round(results,3)), "*", sep = ""),
        p < 0.05 & p >= 0.01 ~ paste(as.character(round(results,3)), "**", sep = ""),
        p < 0.01 & p >= 0 ~ paste(as.character(round(results,3)), "***", sep = ""),
        TRUE ~ as.character(round(results,3))
      ),
      stdev = paste0("(",round(se, 3),")")
    ) %>%
    select(estimate_star, stdev)
  reg_temp$term <- rownames(reg_temp)
  reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
  reg_temp <- as.data.frame(reg_temp)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(j)))
}


write.csv(reg_summary, file = "temp.csv")


reg_summary <- data.frame(term = NA)
for (j in c("hs_completed","clg_enrolled")){
  fit <- with(data = new_imp, exp = glm(get(j) ~ nwp_pct0to6 + nwp_pct6to12 + nwp_pct12to18 + incp_prop + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial, weights = pwt))
  reg_temp <- summary(pool(fit)) %>%
    mutate(
      estimate_star = case_when(
        p.value < 0.1 & p.value >= 0.05 ~ paste(as.character(round(estimate,3)), "*", sep = ""),
        p.value < 0.05 & p.value >= 0.01 ~ paste(as.character(round(estimate,3)), "**", sep = ""),
        p.value < 0.01 & p.value >= 0 ~ paste(as.character(round(estimate,3)), "***", sep = ""),
        TRUE ~ as.character(round(estimate,3))
      ),
      stdev = paste0("(",round(std.error, 3),")")
    ) %>%
    select(term, estimate_star, stdev)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = "term", suffix = c("",j))
}
write.csv(reg_summary, file = "temp.csv")



# Share of poverty waves in each developmental stage
reg_summary <- data.frame(term = NA)
for (j in c("hs_completed","clg_enrolled")){
  fit <- with(data = new_imp, exp = glm(get(j) ~ nwp_share0to6 + nwp_share6to12 + nwp_share12to18 + incp_prop + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial, weights = pwt))
  reg_temp <- summary(pool(fit)) %>%
    mutate(
      estimate_star = case_when(
        p.value < 0.1 & p.value >= 0.05 ~ paste(as.character(round(estimate,3)), "*", sep = ""),
        p.value < 0.05 & p.value >= 0.01 ~ paste(as.character(round(estimate,3)), "**", sep = ""),
        p.value < 0.01 & p.value >= 0 ~ paste(as.character(round(estimate,3)), "***", sep = ""),
        TRUE ~ as.character(round(estimate,3))
      ),
      stdev = paste0("(",round(std.error, 3),")")
    ) %>%
    select(term, estimate_star, stdev)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = "term", suffix = c("",j))
}
write.csv(reg_summary, file = "temp.csv")

# Table 6. Income Mirror ----
reg_summary <- data.frame(term = NA, name = NA)
for (i in c("num_","pct_","num_spell_", "longest_spell_")){
  for (j in c("hs_completed","clg_enrolled")){
    fit <- with(datlist, glm(formula = get(j) ~ get(paste0(i,"nwp")) + get(paste0(i,"incp")) + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial, weights = pwt))
    betas <- lapply(fit, coef)
    vars <- lapply(fit, FUN = function(x){vcovCL(x, cluster = datlist[[1]]$indfid)}) #vcovCL is in the sandwich package
    reg_temp <- summary(pool_mi(betas, vars)) %>%
      mutate(
        estimate_star = case_when(
          p < 0.1 & p >= 0.05 ~ paste(as.character(round(results,3)), "*", sep = ""),
          p < 0.05 & p >= 0.01 ~ paste(as.character(round(results,3)), "**", sep = ""),
          p < 0.01 & p >= 0 ~ paste(as.character(round(results,3)), "***", sep = ""),
          TRUE ~ as.character(round(results,3))
        ),
        stdev = paste0("(",round(se, 3),")")
      ) %>%
      select(estimate_star, stdev)
    reg_temp$term <- rownames(reg_temp)
    reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
    reg_temp <- as.data.frame(reg_temp)
    reg_summary <- reg_summary %>%
      full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(i,j)))
  }
}

write.csv(reg_summary, file = "temp.csv")


reg_summary <- data.frame(term = NA)
for (i in c("num_","pct_","num_spell_", "longest_spell_")){
  for (j in c("hs_completed","clg_enrolled")){
    fit <- with(data = new_imp, exp = glm(get(j) ~ get(paste0(i,"nwp")) + get(paste0(i,"incp")) + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial, weights = pwt))
    reg_temp <- summary(pool(fit)) %>%
      mutate(
        estimate_star = case_when(
          p.value < 0.1 & p.value >= 0.05 ~ paste(as.character(round(estimate,3)), "*", sep = ""),
          p.value < 0.05 & p.value >= 0.01 ~ paste(as.character(round(estimate,3)), "**", sep = ""),
          p.value < 0.01 & p.value >= 0 ~ paste(as.character(round(estimate,3)), "***", sep = ""),
          TRUE ~ as.character(round(estimate,3))
        ),
        stdev = paste0("(",round(std.error, 3),")")
      ) %>%
      select(term, estimate_star, stdev)
    reg_summary <- reg_summary %>%
      full_join(reg_temp, by = "term", suffix = c("",i))
  }
}
write.csv(reg_summary, file = "temp.csv")



# Table 7. Income Mirror, by age ----
reg_summary <- data.frame(term = NA, name = NA)

for (j in c("hs_completed","clg_enrolled")){
  fit <- with(datlist, glm(formula = get(j) ~ nwp_pct0to6 + nwp_pct6to12 + nwp_pct12to18 + incp_pct0to6 + incp_pct6to12 + incp_pct12to18 + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial, weights = pwt))
  betas <- lapply(fit, coef)
  vars <- lapply(fit, FUN = function(x){vcovCL(x, cluster = datlist[[1]]$indfid)}) #vcovCL is in the sandwich package
  reg_temp <- summary(pool_mi(betas, vars)) %>%
    mutate(
      estimate_star = case_when(
        p < 0.1 & p >= 0.05 ~ paste(as.character(round(results,3)), "*", sep = ""),
        p < 0.05 & p >= 0.01 ~ paste(as.character(round(results,3)), "**", sep = ""),
        p < 0.01 & p >= 0 ~ paste(as.character(round(results,3)), "***", sep = ""),
        TRUE ~ as.character(round(results,3))
      ),
      stdev = paste0("(",round(se, 3),")")
    ) %>%
    select(estimate_star, stdev)
  reg_temp$term <- rownames(reg_temp)
  reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
  reg_temp <- as.data.frame(reg_temp)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(j)))
}


write.csv(reg_summary, file = "temp.csv")


reg_summary <- data.frame(term = NA, name = NA)
for (j in c("hs_completed","clg_enrolled")){
  fit <- with(datlist, glm(formula = get(j) ~ nwp_share0to6 + nwp_share6to12 + nwp_share12to18 + incp_share0to6 + incp_share6to12 + incp_share12to18 + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial, weights = pwt))
  betas <- lapply(fit, coef)
  vars <- lapply(fit, FUN = function(x){vcovCL(x, cluster = datlist[[1]]$indfid)}) #vcovCL is in the sandwich package
  reg_temp <- summary(pool_mi(betas, vars)) %>%
    mutate(
      estimate_star = case_when(
        p < 0.1 & p >= 0.05 ~ paste(as.character(round(results,3)), "*", sep = ""),
        p < 0.05 & p >= 0.01 ~ paste(as.character(round(results,3)), "**", sep = ""),
        p < 0.01 & p >= 0 ~ paste(as.character(round(results,3)), "***", sep = ""),
        TRUE ~ as.character(round(results,3))
      ),
      stdev = paste0("(",round(se, 3),")")
    ) %>%
    select(estimate_star, stdev)
  reg_temp$term <- rownames(reg_temp)
  reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
  reg_temp <- as.data.frame(reg_temp)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(j)))
}


write.csv(reg_summary, file = "temp.csv")


reg_summary <- data.frame(term = NA)
for (j in c("hs_completed","clg_enrolled")){
  fit <- with(data = new_imp, exp = glm(get(j) ~ nwp_pct0to6 + nwp_pct6to12 + nwp_pct12to18 + incp_pct0to6 + incp_pct6to12 + incp_pct12to18 + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial, weights = pwt))
  reg_temp <- summary(pool(fit)) %>%
    mutate(
      estimate_star = case_when(
        p.value < 0.1 & p.value >= 0.05 ~ paste(as.character(round(estimate,3)), "*", sep = ""),
        p.value < 0.05 & p.value >= 0.01 ~ paste(as.character(round(estimate,3)), "**", sep = ""),
        p.value < 0.01 & p.value >= 0 ~ paste(as.character(round(estimate,3)), "***", sep = ""),
        TRUE ~ as.character(round(estimate,3))
      ),
      stdev = paste0("(",round(std.error, 3),")")
    ) %>%
    select(term, estimate_star, stdev)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = "term", suffix = c("",j))
}
write.csv(reg_summary, file = "temp.csv")

reg_summary <- data.frame(term = NA)
for (j in c("hs_completed","clg_enrolled")){
  fit <- with(data = new_imp, exp = glm(get(j) ~ nwp_share0to6 + nwp_share6to12 + nwp_share12to18 + incp_share0to6 + incp_share6to12 + incp_share12to18+ is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial, weights = pwt))
  reg_temp <- summary(pool(fit)) %>%
    mutate(
      estimate_star = case_when(
        p.value < 0.1 & p.value >= 0.05 ~ paste(as.character(round(estimate,3)), "*", sep = ""),
        p.value < 0.05 & p.value >= 0.01 ~ paste(as.character(round(estimate,3)), "**", sep = ""),
        p.value < 0.01 & p.value >= 0 ~ paste(as.character(round(estimate,3)), "***", sep = ""),
        TRUE ~ as.character(round(estimate,3))
      ),
      stdev = paste0("(",round(std.error, 3),")")
    ) %>%
    select(term, estimate_star, stdev)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = "term", suffix = c("",j))
}
write.csv(reg_summary, file = "temp.csv")



